global projectdir "~"
global datadir "$projectdir/data"
global intermed "$projectdir/data/intermediate_files"
global alex_transfer  "$projectdir/data/transfer"

set more off

*******SETUP*******


*** Create PI variable

use "$datadir/raw_pulls/umetrics/FSRDC_2018/core_employee2019q1_pik.dta", clear
gen byte faculty=(umetrics_occ=="Faculty"|umetrics_occ=="faculty")
ren institution_id submit_university

replace proportion=0 if proportion<0
replace proportion=1 if proportion>1 & proportion<.
bysort unique_award emp_number: egen temp=sum(faculty*proportion)
gsort unique_award -temp fte_status period_end_date period_start_date -proportion
by unique_award: gen temp2=_n

preserve
keep if temp2==1 & faculty==1
egen pinumber=group(emp_number)
keep unique_award pinumber
save "$intermed/pi_award", replace
restore

merge m:1 unique_award using "$intermed/pi_award", keep(1 3) nogen

drop if pi==.

gen year_end=year(period_end)
bysort emp_number year_end pinumber: egen temp3=sum(proportion)
gsort emp_number year_end -temp3
by emp_number year_end: gen temp4=_n

keep if temp4==1
ren year_end year
keep emp_number pinumber year
ren emp_number pik

save "$intermed/pi", replace
 //this is a pik-year level dataset. a pik can be assoc with mult pi numbers


*** 4 Merge PI variable to main dataset
u "$intermed/pi", clear
merge 1:m pik year using "$datadir/jobhist_lehd_E.dta"
drop if _m==1
replace pinumber=0 if pinumber==.
drop _m

save "$datadir/jobhist_lehd_E_20220614_pos_shock.dta", replace
 
u "$datadir/jobhist_lehd_E_20220614_pos_shock.dta", clear
sort pik year
gen pinumber_infill = pinumber if pinumber!=0

forvalues i=1/35{
replace pinumber_infill=pinumber_infill[_n-1] if missing(pinumber_infill) & !missing(pinumber_infill[_n-1]) & pik==pik[_n-1]
replace pinumber_infill=pinumber_infill[_n+1] if missing(pinumber_infill) & !missing(pinumber_infill[_n+1]) & pik==pik[_n+1]
}
replace pinumber_infill = pinumber if missing(pinumber_infill)

save "$datadir/jobhist_lehd_E_20220614_pos_shock.dta", replace
 
 

* 5 Add the new shocks and matching approach
//treatment: people who have temporary pos shock. control is people with no large pos shocks.

****************************************
*******DEFINE POS SHOCKS************
****************************************



*** ADD SHOCKS TO MAIN DATA
u "$datadir/jobhist_lehd_E_20220614_pos_shock.dta", clear
merge m:1 iris_employee_number using  "$alex_transfer/employee_shocks_positive.dta"
*******NOTE NOTE NOTE: I don't see any reason to keep anything but treatment and contols here, as it just makes things complcated later 
drop if _m==1
drop if _m==2
drop _m

* Add occupational categorical var and new outcome variables
gen occup_cats=1 if occup_faculty==1
replace occup_cats=2 if occup_gradpostdoc==1
replace occup_cats=3 if occup_undergradstudent==1
replace occup_cats=4 if occup_other_staff==1


** Make variables for event study
gen tau=year-year_shock
replace tau=0 if treated==0 
*tab tau, gen(shock_dummies)
gen post=tau>1 & tau!=.
save "$datadir/jobhist_lehd_E_20220614_pos_shock.dta", replace


*** Add entrep data ******
gen ht_overage=1/(firmage_ht+1) if firmage_ht>0
replace ht_overage = 1 if firmage_ht <=0
replace ht_overage = 0 if missing(ht_overage)

gen overage=1/(firmage_priv+1) if firmage_priv>0
replace overage = 1 if firmage_priv <=0
replace overage = 0 if missing(firmage_priv)

replace count_ht_mode_start_2012=ht_mode_start_2012 if count_ht_mode_start_2012==0
replace count_start=start if count_start==0
replace count_ht_mode_young_2012 = ht_mode_young_2012 if count_ht_mode_young_2012==0

compress
save "$datadir/jobhist_lehd_E_20220614_pos_shock.dta", replace


** Fields & R1 Univs
gen field_science = (fieldid== 4 | fieldid== 5 | fieldid== 12 | fieldid== 15 | fieldid== 16)
gen field_engin = (fieldid== 8) & field_science==0
gen field_bioMedPharma = (fieldid== 2 | fieldid==13| fieldid==10) & field_science==0 & field_engin==0
gen field_other = (field_science==0 & field_engin==0 & field_bioMedPharma==0) 

gen R1_if_univ=(carnegie==15 & alt_carnegie_max==15) if alt_univ_ein_max==1
gen R1=(carnegie==15 & alt_carnegie_max==15)

tsset emp_num year
save "$datadir/jobhist_lehd_E_20220614_pos_shock.dta", replace



